# Figure 3 rhs
rm(list=ls())

(WD <- getwd())
if (!is.null(WD)) setwd(WD)

indata0      = paste0(WD,"/_individual_data/_spfrawdata/","", collapse = NULL)
indata1      = paste0(WD,"/_other_data/_public_signals/_hard_variables/","", collapse = NULL)
indata2      = paste0(WD,"/_other_data/_public_signals/_survey_variables/","", collapse = NULL)

library(haven)
library(AER)
library(sandwich)
library(lmtest)
library(pracma)
library(stargazer)
library(plm)
library(pracma)
library(DataCombine)
library(jtools)
library(plyr)
library(ggplot2)
library(tidyverse)
library(dotwhisker)

lagpad <- function(x, k) {
  res <- c(rep(NA, k), x)[1:length(x)]
  return(res)
}

n_hard    = 9
n_survey  = 4

load(paste(indata0,"us_spf_pgdp_ind.Rda",sep=""))
data$ID         = data$id
data            = data[data$qdate>=1981.00,]
data            = ddply(data,.(qdate),transform, cons = mean(f2,na.rm = TRUE))
data_fcast      = subset(data, select = c(qdate,fc_err, ID, p_realz))

data_tmp        = aggregate(. ~ qdate, data_fcast, mean)
data_tmp$lag    = lagpad(data_tmp$p_realz,5)
data_tmp        = subset(data_tmp, select = c(qdate, lag))

load(paste(indata1,"public_signals_hard.Rda",sep=""))
data_hard       = data
data_hard$qdate = data_hard$qdate+5/4 
data_hard       = merge(data_hard, data_fcast, by = "qdate", all = TRUE)
data_hard       = merge(data_hard, data_tmp, by= "qdate", all = TRUE)

load(paste(indata2,"public_signals_survey.Rda",sep=""))
data_survey       = data
data_survey$qdate = data_survey$qdate+5/4
data_survey       = merge(data_survey, data_fcast, by = "qdate", all = TRUE)

iter          = 0
out_beta_hard  = rep(NA, n_hard)
out_se_hard    = rep(NA, n_hard)
out_n_hard     = rep(NA, n_hard)
beta_save      = rep(NA, n_hard)
accuracy_hard  = rep(NA, n_hard)

for(ii in colnames(data_hard)) {
  if (ii != "qdate"  && ii != "fc_err" && ii != "ID" && ii != "p_realz") { 
      iter = iter + 1
      print(ii)
    
      data_hard$xtmp = scale(data_hard[[ii]])
      sign_reg       = plm(p_realz ~ xtmp, data=data_hard,index=c("ID", "qdate"), model="within")
      coefs          = coefficients(sign_reg)
      beta           = as.numeric(coefs[1])
      beta_save[iter] = beta
      
      if (beta>0) {
        plm_hard     = plm(fc_err ~ xtmp, data=data_hard,index=c("ID", "qdate"), model="within")
        beta          = coefficients(plm_hard)
        std_error     = sqrt(diag(vcovDC(plm_hard, type = "HC1")))
        
        out_beta_hard[iter]   = as.numeric(beta[1])
        out_se_hard[iter]     = as.numeric(std_error[1])
        out_n_hard[iter]      = nobs(plm_hard)
        
        
      } else if (beta<0) {
        data_hard$xtmp = -data_hard$xtmp
        
        plm_hard     = plm(fc_err ~ xtmp, data=data_hard,index=c("ID", "qdate"), model="within")
        beta          = coefficients(plm_hard)
        std_error     = sqrt(diag(vcovDC(plm_hard, type = "HC1")))
        
        out_beta_hard[iter]   = as.numeric(beta[1])
        out_se_hard[iter]     = as.numeric(std_error[1])
        out_n_hard[iter]      = nobs(plm_hard) 
        
      }
  }
}

data_acc                 = aggregate(. ~ qdate, data_hard, mean, na.action = NULL)
iter = 0
for(ii in colnames(data_acc)) {
  if (ii == "dlneer" || ii == "cf_exp_infl" || ii == "dlimp" || ii == "dloil" || ii == "dlstocks" || ii == "term" || ii == "tips" || ii == "urate" || ii == "lag") {
  iter                     = iter + 1  
  data_acc$xtmp            = scale(data_acc[[ii]])
  lm_accuracy              = lm(p_realz ~ xtmp, data = data_acc) 
  resids                   = residuals(lm_accuracy)
  accuracy_hard[iter]      = sd(resids, na.rm=TRUE)^(-2 )
  }
}

all_hard =  data.frame(out_beta_hard, out_se_hard, out_n_hard, accuracy_hard)

clean_hard = all_hard 
term = c('NEER','FIN','IMPP','OIL','STOX','TERM','TIPS','UNMP', 'LAG')
clean_hard$term = term
names(clean_hard)[1] <- "destimate"
names(clean_hard)[2] <- "std.error"
names(clean_hard)[3] <- "nr.obs"
names(clean_hard)[4] <- "accuracy"
clean_hard = clean_hard[c(9,7,1,3,4,8,2,5,6),]


iter          = 0
out_beta_survey  = rep(NA, n_survey)
out_se_survey    = rep(NA, n_survey)
out_n_survey     = rep(NA, n_survey)
beta_save        = rep(NA, n_survey)
accuracy_survey  = rep(NA, n_survey)

for(ii in colnames(data_survey)) {
  if (ii != "qdate"  && ii != "fc_err" && ii != "ID" && ii != "p_realz") { 
    iter = iter + 1
    print(ii)
    
    data_survey$xtmp = scale(data_survey[[ii]])

     plm_survey    = plm(fc_err ~ xtmp, data=data_survey,index=c("ID", "qdate"), model="within")
      beta          = coefficients(plm_survey)
      std_error     = sqrt(diag(vcovDC(plm_survey, type = "HC1")))
      
      out_beta_survey[iter]   = as.numeric(beta[1])
      out_se_survey[iter]     = as.numeric(std_error[1])
      out_n_survey[iter]      = nobs(plm_survey) 
      
  }
}


data_acc                = aggregate(. ~ qdate, data_survey, mean, na.action=NULL)
iter = 0
for(ii in colnames(data_acc)) {
 if (ii == "mich" || ii == "sce" || ii == "spf" || ii == "liv") { 
    iter                    = iter + 1
    data_acc$xtmp            = scale(data_acc[[ii]])
    lm_accuracy              = lm(p_realz ~ xtmp, data = data_acc) 
    resids                   = data_acc$p_realz - data_acc[[ii]]
    if (ii == "liv"){
      resids                = residuals(lm_accuracy)
    }
    accuracy_survey[iter]   = sd(resids, na.rm=TRUE)^(-2 )
  }
}

all_survey  =  data.frame(out_beta_survey, out_se_survey, out_n_survey, accuracy_survey)

clean_survey = all_survey 
term = c('MICH','SCE','SPF', 'LIV')
clean_survey$term = term
names(clean_survey)[1] <- "destimate"
names(clean_survey)[2] <- "std.error"
names(clean_survey)[3] <- "nr.obs"
names(clean_survey)[4] <- "accuracy"
clean_survey = clean_survey[c(3,1,2,4),]


clean_data = rbind(clean_hard,clean_survey)
clean_data= clean_data[!(clean_data$term=="TIPS"),]

#pdf("_figures/figure_4_rhs.pdf") 
#plot(clean_data$accuracy, clean_data$destimate, main="", ylim=c(-0.40,0.20), xlim = c(0,3), xaxt ="n",bty="n",
#     xlab="Precision of Public Signal ", ylab="Over-/Underreaction ", pch=19)
#axis(1, at=seq(0,3, by = 1))
#abline(lm(destimate~accuracy, data=clean_data), col="red") # regression line (y~x)
#dev.off()

pdf("_figures/figure_3_rhs.pdf") 
plot(clean_data$accuracy, clean_data$destimate, main="", ylim=c(-0.40,0.20), xlim = c(0,3), xaxt ="n",bty="n",
     xlab="Precision of Public Signal ", ylab="Over-/Underreaction ", pch=19)
axis(1, at=seq(0,3, by = 1))
abline(lm(destimate~accuracy, data=clean_data), col="black") # regression line (y~x)
dev.off()

regresion.lm = lm(destimate~ accuracy, data=clean_data)
summary(regresion.lm)
